Os dados¶
Lendo e visualizandos os dados do Brasil do COVID fornecidos por
Os dados são atualizados diariamente… Então normalmente a cada dia esse pipeline pode mudar seus resultados.
[1]:
import pandas as pd
# Lendo os dados online - wcota
data_path = 'https://raw.githubusercontent.com/wcota/covid19br/master/cases-brazil-states.csv'
online_data = pd.read_csv(data_path, delimiter=",")
online_data.head()
[1]:
date | country | state | city | newDeaths | deaths | newCases | totalCases | deathsMS | totalCasesMS | deaths_per_100k_inhabitants | totalCases_per_100k_inhabitants | deaths_by_totalCases | recovered | suspects | tests | tests_per_100k_inhabitants | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2020-02-25 | Brazil | SP | TOTAL | 0 | 0 | 1 | 1 | 0 | 0 | 0.0 | 0.00218 | 0.0 | NaN | NaN | NaN | NaN |
1 | 2020-02-25 | Brazil | TOTAL | TOTAL | 0 | 0 | 1 | 1 | 0 | 0 | 0.0 | 0.00048 | 0.0 | NaN | NaN | NaN | NaN |
2 | 2020-02-26 | Brazil | SP | TOTAL | 0 | 0 | 0 | 1 | 0 | 1 | 0.0 | 0.00218 | 0.0 | NaN | NaN | NaN | NaN |
3 | 2020-02-26 | Brazil | TOTAL | TOTAL | 0 | 0 | 0 | 1 | 0 | 1 | 0.0 | 0.00048 | 0.0 | NaN | NaN | NaN | NaN |
4 | 2020-02-27 | Brazil | SP | TOTAL | 0 | 0 | 0 | 1 | 0 | 1 | 0.0 | 0.00218 | 0.0 | NaN | NaN | NaN | NaN |
Filtrando e limpando os dados¶
[2]:
selected_state = "SP"
at_state = online_data['state']==selected_state
local_data = online_data[at_state]
local_data = local_data[local_data.recovered.notnull()]
#local_data = local_data.fillna(method="backfill")
local_data.head()
[2]:
date | country | state | city | newDeaths | deaths | newCases | totalCases | deathsMS | totalCasesMS | deaths_per_100k_inhabitants | totalCases_per_100k_inhabitants | deaths_by_totalCases | recovered | suspects | tests | tests_per_100k_inhabitants | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
347 | 2020-03-24 | Brazil | SP | TOTAL | 10 | 40 | 65 | 810 | 40 | 810 | 0.08711 | 1.76397 | 0.04938 | 1.0 | 4572.0 | NaN | NaN |
375 | 2020-03-25 | Brazil | SP | TOTAL | 8 | 48 | 52 | 862 | 48 | 862 | 0.10453 | 1.87722 | 0.05568 | 1.0 | 4300.0 | NaN | NaN |
403 | 2020-03-26 | Brazil | SP | TOTAL | 10 | 58 | 191 | 1053 | 58 | 1052 | 0.12631 | 2.29317 | 0.05508 | 1.0 | 14312.0 | NaN | NaN |
431 | 2020-03-27 | Brazil | SP | TOTAL | 10 | 68 | 170 | 1223 | 68 | 1223 | 0.14809 | 2.66338 | 0.05560 | 1.0 | 14312.0 | NaN | NaN |
459 | 2020-03-28 | Brazil | SP | TOTAL | 16 | 84 | 183 | 1406 | 84 | 1406 | 0.18293 | 3.06191 | 0.05974 | 1.0 | 14312.0 | NaN | NaN |
[3]:
import numpy as np
from datetime import datetime
first_date = local_data["date"].iloc[0]
first_date = datetime.fromisoformat(first_date)
if selected_state == "SP":
# N = 11869660
N = 44.01e6
elif selected_state == "TOTAL":
N = 220e6
I = list() # <- I(t)
R = local_data["recovered"].iloc[1:].to_numpy() # <- R(t)
M = local_data["newDeaths"].iloc[1:].to_numpy() # <- M(t)
nR = np.diff(local_data["recovered"].to_numpy()) # <- dR(t)/dt
nC = local_data["newCases"].iloc[1:].to_numpy() # <- nC(t)/dt
I = [ local_data["totalCases"].iloc[1] ] # I(0)
# I(t) <- I(t-1) + newCases(t) - newMortes(t) - newRecovered(t)
for t in range(len(M)-1):
I.append(I[-1] + nC[t] - M[t] - nR[t])
I = np.array(I)
Visualizando a evolução¶
[18]:
from bokeh.models import Legend, ColumnDataSource, RangeTool, LinearAxis, Range1d, HoverTool
from bokeh.palettes import brewer, Inferno256
from bokeh.plotting import figure, show
from bokeh.layouts import column
from bokeh.io import output_notebook
output_notebook()
from datetime import timedelta
# Criando o vetor de tempo
date_vec = [ first_date + timedelta(days=k) for k in range(len(M))]
# Criando os valores para legenda no plot
year = [str(int(d.year)) for d in date_vec ]
month = [("0"+str(int(d.month)))[-2:] for d in date_vec ]
day = [("0"+str(int(d.day)))[-2:] for d in date_vec ]
# Criando a fonte de dados
source = ColumnDataSource(data={
'Data' : date_vec,
'd': day, 'm': month, 'y': year,
'Infectados' : I,
'Removidos' : R,
'Mortes' : M,
})
# Criando a figura
p = figure(plot_height=500,
plot_width=600,
x_axis_type="datetime",
tools="",
toolbar_location=None,
y_axis_type="log",
title="Evolução do COVID - São Paulo")
# Preparando o estilo
p.grid.grid_line_alpha = 0
p.ygrid.band_fill_color = "olive"
p.ygrid.band_fill_alpha = 0.1
p.yaxis.axis_label = "Indivíduos"
p.xaxis.axis_label = "Dias"
# Incluindo as curvas
i_p = p.line(x='Data', y='Infectados', legend_label="Infectados", line_cap="round", line_width=3, color="#ffd885", source=source)
m_p = p.line(x='Data', y='Mortes', legend_label="Mortes", line_cap="round", line_width=3, color="#de425b", source=source)
r_p = p.line(x='Data', y='Removidos', legend_label="Removidos", line_cap="round", line_width=3, color="#99d594", source=source)
# Colocando as legendas
p.legend.click_policy="hide"
p.legend.location = "top_left"
# Incluindo a ferramenta de hover
p.add_tools(HoverTool(
tooltips=[
( 'Indivíduos', '$y{i}'),
( 'Data', '@d/@m/@y' ),
],
renderers=[
r_p, i_p, m_p
]
))
show(p)
O problema¶
O conjunto de equações diferenciais que caracteriza o modelo é descrito abaixo. No modelo $:nbsphinx-math:beta `- :nbsphinx-math:text{representa a taxa de transmissão ou taxa efetiva de contato}` $ e \(r - \text{a taxa de remoção ou recuperação.}\)
Gostaríamos de identificar quais parâmetros \(\beta\) e \(r\) resultam num melhor ajuste do modelo para os dados de S,I e R
[5]:
# Importando o modelo SIR
from models import *
sir_model = ss.SIR(pop=N, focus=["I", "R"])
Estimando os parâmetros¶
Para estimarmos os parâmetros do modelo \(\mathbf{\beta}\) e \(\mathbf{r}\), vamos utilizar inicialmente o método de mínimos quadrados. Podemos então formular o problema a partir da Equação abaixo. Na Equação \(y_m(k)\) representa o dado real em cada amostra \(k\); \(y_s(\theta,k)\) representa o valor estimado a partir da simulação do modelo para uma determinada amostra \(k\) e \(\theta\) representa o vetor ed parâmetros \(\theta = [ \beta \; \; r]^T\).
A equação formula a pergunta: quais os valores de \(beta\) e \(r\) que minizam o erro quadrático quando comparados com os dados reais.
[12]:
import numpy as np
S = N - I - R
time = np.linspace(0, len(I), len(I))
# Estimando os parâmetros
sir_model.fit(S, I, R, time, sample_ponder=True, resample=True, beta_sens=[1000,10], r_sens=[1000,10])
r_included = True
├─ Resample from sizes ─ 65 65 65 65
└─ Resample to sizes ─ 1540 1540 1540 1540
├─ S(0) ─ I(0) ─ R(0) ─ [44009136.41827327, 862.5810526571099, 1.0006740750082481]
├─ beta ─ 1 r ─ 0.14285714285714285
├─ beta bound ─ 0.001 ─ 10
├─ r bound ─ 0.00014285714285714284 ─ 1.4285714285714284
├─ equation weights ─ [0.0016004861866103122, 1, 1]
└─ Defined at: 0.08388235287485474 ─ 0.010752489662406836
[13]:
sir_model.parameters[0]/sir_model.parameters[1] # Ro <- \beta * (1 / \r)
[13]:
7.801202838457649
[19]:
# Criando a figura
p1 = figure(plot_height=500,
plot_width=600,
x_axis_type="datetime",
tools="",
toolbar_location=None,
# y_axis_type="log",
title="Evolução do COVID - São Paulo")
# Preparando o estilo
p1.grid.grid_line_alpha = 0
p1.ygrid.band_fill_color = "olive"
p1.ygrid.band_fill_alpha = 0.1
p1.yaxis.axis_label = "Indivíduos"
p1.xaxis.axis_label = "Dias"
# Incluindo as curvas
p1.line(time, I, legend_label="Infectados", line_cap="round", line_width=3, color="#ffd885")
#p1.line(time, , legend_label="Mortes", line_cap="round", line_width=3, color="#de425b")
p1.line(time, R, legend_label="Removidos", line_cap="round", line_width=3, color="#99d594")
p1.scatter(sir_model.pipeline["resample"]["after"]["t"],
sir_model.pipeline["resample"]["after"]["I"],
marker="circle",line_color="#6666ee", fill_color="#ee6666", fill_alpha=0.5, size=3)
p1.scatter(sir_model.pipeline["resample"]["after"]["t"],
sir_model.pipeline["resample"]["after"]["R"],
marker="circle",line_color="#6666ee", fill_color="#ee6666", fill_alpha=0.5, size=3)
# Colocando as legendas
p1.legend.click_policy="hide"
p1.legend.location = "top_left"
show(p1)
[20]:
if r_included:
initial = [S[0], I[0], R[0]]
else:
initial = [S[0], I[0]]
results = sir_model.predict(initial, time)
[21]:
# Incluindo os dados de infectados
im_p = p.line(
date_vec, results[1],
legend_label="Infectados - Modelo",
line_width=4,
line_dash="dashed",
line_cap="round",
color="#f57f17"
)
# Incluindo os dados de recuperados
if r_included:
rm_p = p.line(
date_vec, results[2],
legend_label="Removidos - Modelo",
line_dash="dashed",
line_width=4,
line_cap="round",
color="#1b5e20"
)
show(p)
Predições utilizando o modelo¶
[17]:
# Criando os valores de tempo para previsão - 120 dias
t_sim = np.linspace(0, len(I) + 120, len(I) + 120)
date_vec_sim = [first_date + timedelta(days=k) for k in t_sim]
# Prevendo para os valores selecionados
prediction = sir_model.predict(initial, t_sim)
# Criando o gráfico com as predições
# Criando os valores para legenda no plot
year_sim = [str(int(d.year)) for d in date_vec_sim ]
month_sim = [("0"+str(int(d.month)))[-2:] for d in date_vec_sim ]
day_sim = [("0"+str(int(d.day)))[-2:] for d in date_vec_sim ]
if r_included:
accum_Infect = [0]
for i in N - prediction[1] - prediction[2]:
accum_Infect.append(accum_Infect[-1]+i)
accum_Infect = sir_model.parameters[1] * np.array(accum_Infect)
# Criando a fonte de dados
if r_included:
source = ColumnDataSource(data={
'Data' : date_vec,
'd': day, 'm': month, 'y': year,
'Infectados' : I,
'Removidos' : R,
'Mortes' : M,
'InfecModelo' : prediction[1],
'RemovModelo' : prediction[2],
'AccumInfect' : accum_Infect,
'SucetModelo' : N - prediction[1] - prediction[2],
'DataModelo' : date_vec_sim,
'ds': day_sim, 'ms': month_sim, 'ys': year_sim
})
else:
source = ColumnDataSource(data={
'Data' : date_vec,
'd': day, 'm': month, 'y': year,
'Infectados' : I,
'Removidos' : R,
'Mortes' : M,
'InfecModelo' : prediction[1],
'DataModelo' : date_vec_sim,
'ds': day_sim, 'ms': month_sim, 'ys': year_sim
})
# Criando a figura
p = figure(plot_height=700,
plot_width=800,
x_axis_type="datetime",
tools="",
toolbar_location=None,
y_axis_type="log",
title="Previsão do COVID - Brasil")
# Preparando o estilo
p.grid.grid_line_alpha = 0
p.ygrid.band_fill_color = "olive"
p.ygrid.band_fill_alpha = 0.1
p.yaxis.axis_label = "Indivíduos"
p.xaxis.axis_label = "Dias"
# Incluindo as curvas
i_p = p.line(x='Data', y='Infectados', legend_label="Infectados", line_cap="round", line_width=3, color="#ffd885", source=source)
m_p = p.line(x='Data', y='Mortes', legend_label="Mortes", line_cap="round", line_width=3, color="#de425b", source=source)
r_p = p.line(x='Data', y='Removidos', legend_label="Removidos", line_cap="round", line_width=3, color="#99d594", source=source)
mp_p = p.line(x='DataModelo', y='InfecModelo', legend_label="Infectados - Modelo", line_dash="dashed", line_cap="round", line_width=4, color="#f57f17", source=source)
renders = [i_p, m_p, r_p, mp_p]
if r_included:
rp_p = p.line(x='DataModelo', y='RemovModelo', legend_label="Removidos - Modelo", line_dash="dashed", line_cap="round", line_width=4, color="#1b5e20", source=source)
renders.append(rp_p)
# Colocando as legendas
p.legend.click_policy="hide"
p.legend.location = "top_left"
# Incluindo a ferramenta de hover
p.add_tools(HoverTool(
tooltips=[
( 'Indivíduos', '$y{0.00 a}' ),
( 'Data', '@ds/@ms/@ys'),
],
renderers=renders
))
show(p)
BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('AccumInfect', 186), ('Data', 65), ('DataModelo', 185), ('InfecModelo', 185), ('Infectados', 65), ('Mortes', 65), ('RemovModelo', 185), ('Removidos', 65), ('SucetModelo', 185), ('d', 65), ('ds', 185), ('m', 65), ('ms', 185), ('y', 65), ('ys', 185)